library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.2
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 0.8.3 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(readxl)
library(gganimate)
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
c2015 <- read_excel(path='c2015.xlsx')
df <- c2015 %>%
filter_all(~!(.=="Unknown")) %>%
filter_all(~!(.=="Not Rep")) %>%
filter_all(~!(.=="Not Reported")) %>%
filter_all(~!(.==str_detect(.,'Not Rep'))) %>%
filter_all(~!(.==str_detect(.,'Unknown'))) %>%
filter_all(~!(.==str_detect(.,'Not Reported'))) %>%
filter(SEAT_POS=='Front Seat, Left Side') %>%
mutate(TRAV_SP = as.numeric(substr(TRAV_SP, 1, 3))) %>%
mutate(AGE = as.numeric(case_when(AGE=="Less than 1"~"0",
TRUE~AGE))) %>%
mutate(AGE=case_when(is.na(AGE) ~ mean(AGE,na.rm=TRUE),
TRUE ~ AGE)) %>%
mutate(SEX=case_when(is.na(SEX) | SEX=="Not Rep" | SEX=="Unknown" ~ "Female",
TRUE~SEX)) %>%
filter_all(~!is.na(.))
## Warning: NAs introduced by coercion
ggplot(df%>%
group_by(SEX,INJ_SEV,MONTH)%>%
summarise(avg=mean(TRAV_SP))%>%
mutate(speedZ=(avg-mean(avg))/sd(avg))%>%
mutate(spType=case_when(speedZ<0~'Below', TRUE~'Above')),
aes(x=MONTH, y=speedZ, label=speedZ))+
geom_bar(stat='identity', aes(fill=spType, width=0.8))+
scale_fill_manual(name='Monthly Speed',
labels = c('Above Average', 'Below Average'),
values = c("Above"="#00ba38", "Below"="#f8766d"))+
labs(subtitle='Normalized average monthly speeds',
title='Diverging Bars')+
coord_flip()+
transition_states(MONTH)+
labs(title='MONTH={closest_state}')
## Warning: Ignoring unknown aesthetics: width
ggplot(df, aes(x=DRINKING, fill=SEX))+
geom_bar()+
transition_states(MONTH)+
labs(title='MONTH={closest_state}')
ggplot(hdcd, aes(x=Student.Loan, y=Credit.Card))+
geom_line()
hdcd$observation <- 1:nrow(hdcd)
ggplot(hdcd, aes(x=Student.Loan, y=Credit.Card))+
geom_line()+
transition_reveal(observation)
hdcd$date<-seq.Date(as.Date('2003-01-01'), as.Date('2019-06-01'), by = 'quarter')
ggplot(hdcd, aes(x=date, y=Student.Loan))+
geom_line()
ggplot(hdcd, aes(x=date, y=Student.Loan))+
geom_line()+
transition_reveal(date)
ggplot(hdcd, aes(x=date, y=Student.Loan))+
geom_point()+
transition_reveal(date)+
geom_line()+
geom_text(aes(label=Student.Loan))
temp<-hdcd%>%
gather(Value, key=Debt, c(Mortgage, Credit.Card, Student.Loan, Auto.Loan, HE.Revolving, Other))
ggplot(temp, aes(x=date, y=Value,color=Debt))+
geom_line()+
geom_point()+
geom_text(aes(label=Value))+
transition_reveal(date)
hdcd %>%
select(-c(observation,date,Quarter))%>%
as.matrix()%>%
rcorr(type="spearman")
## Mortgage HE.Revolving Auto.Loan Credit.Card Student.Loan
## Mortgage 1.00 0.38 0.39 0.77 0.49
## HE.Revolving 0.38 1.00 -0.36 0.15 -0.22
## Auto.Loan 0.39 -0.36 1.00 0.36 0.76
## Credit.Card 0.77 0.15 0.36 1.00 0.13
## Student.Loan 0.49 -0.22 0.76 0.13 1.00
## Other -0.04 -0.24 -0.12 0.52 -0.57
## Total 0.93 0.11 0.63 0.71 0.72
## Other Total
## Mortgage -0.04 0.93
## HE.Revolving -0.24 0.11
## Auto.Loan -0.12 0.63
## Credit.Card 0.52 0.71
## Student.Loan -0.57 0.72
## Other 1.00 -0.11
## Total -0.11 1.00
##
## n= 66
##
##
## P
## Mortgage HE.Revolving Auto.Loan Credit.Card Student.Loan
## Mortgage 0.0015 0.0013 0.0000 0.0000
## HE.Revolving 0.0015 0.0028 0.2206 0.0748
## Auto.Loan 0.0013 0.0028 0.0034 0.0000
## Credit.Card 0.0000 0.2206 0.0034 0.2907
## Student.Loan 0.0000 0.0748 0.0000 0.2907
## Other 0.7550 0.0545 0.3574 0.0000 0.0000
## Total 0.0000 0.3802 0.0000 0.0000 0.0000
## Other Total
## Mortgage 0.7550 0.0000
## HE.Revolving 0.0545 0.3802
## Auto.Loan 0.3574 0.0000
## Credit.Card 0.0000 0.0000
## Student.Loan 0.0000 0.0000
## Other 0.3639
## Total 0.3639
Mortgage is the most correlated with the total.
temp2<-hdcd%>%
gather(Value, key=Debt, c(Mortgage, Credit.Card, Student.Loan, Auto.Loan, HE.Revolving, Other, Total))
temp2 %>%
filter(Debt=="Mortgage"|Debt=="Total") %>%
filter(str_detect(Quarter, 'Q1'))%>%
ggplot(aes(x=date, y=Value,color=Debt))+
geom_line()+
geom_point()+
geom_text(aes(label=Value))+
transition_reveal(date)+
labs(title='Mortgage and Total debt by Year',
subtitle = "Mortgage is the most correlated type of debt with the overall debt.")
temp2 %>%
filter(Debt != "Mortgage" & Debt != "Total") %>%
filter(str_detect(Quarter, 'Q1'))%>%
ggplot(aes(x=date, y=Value,color=Debt))+
geom_line()+
geom_point()+
geom_text(aes(label=Value))+
transition_reveal(date)+
labs(title='Non-Mortgage debt sources',
subtitle = 'These 6 debt types are less correlated with the total debt')
Its unfortunate to see that student loan debt is rising drastically compared to all other debts other than auto. Its interesting to see how correlated the rise in total debt is to mortage debt. They almost move in unison.
ggplot(temp, aes(x=Debt, y=Value, fill=Debt))+
geom_bar(stat="identity")+
transition_states(date)+
labs(title='{closest_state}')
temp%>%
ggplot(aes(x=1,y=Value, fill=Debt))+
geom_bar(stat="identity", position = "fill")+
transition_states(date)+
labs(title="Debt by quarter",
subtitle = '{closest_state}')
temp%>%
filter(Debt!="Mortgage")%>%
ggplot(aes(x=date, y=Value,color=Debt))+
geom_line(position = "jitter", size=6)+
geom_point(position="jitter")+
transition_reveal(date)